# Reading the pre-prepared dataset
all <- read_excel("che_covid19.xlsx")[,2:40] %>% relocate(mortality, .before = `Country Code`)
all
# There are some extra variables, but we will not use all of them
to_use <- c("mortality", "che", "beds", "pop65", "popdens", "urban", "dphe", "dghe", "tobacco", "procur", "doctors", "nurses", "beh_stayhome", "beh_socgathering", "beh_distance", "beh_tellsymp", "beh_handwash", "fob_social", "fob_handshake", "fob_stores", "fob_curfew", "perceivedreaction_d", "govtrust_d", "govfact_d", "perceivedeffectiveness_d", "region", "population")
# Selecting columns we plan to use
data <- subset(all, select=to_use)
data
# Alleviating the problems in the next function caused by "_" in column names
colnames(data) <- gsub("_", ".", colnames(data))
# Calculating descriptive statistics
number <- function(x, na.rm = TRUE){return(sum(!is.na(x)))}
stats <- data %>%
summarise(across(where(is.numeric),
list(mean = mean, median = median, sd = sd, min = min, max = max, Q1=~quantile(., probs = 0.25), Q3=~quantile(., probs = 0.75)),
na.rm = TRUE)) %>%
pivot_longer(everything(), names_to = "name", values_to = "value") %>%
separate(name, c("variable", "statistic"), sep = "_") %>%
pivot_wider(names_from = statistic, values_from = value) %>%
arrange(variable) %>%
select(variable, mean, sd, min, Q1, median, Q3, max)
# Changing column names back
colnames(data) <- gsub("\\.", "_", colnames(data))
data
# Changing variable names to match
stats$variable <- gsub("\\.", "_", stats$variable)
options(scipen=999) # Disabling scientific notation (e.g., e+01)
# Putting variables in the original order
stats = na.omit(stats[match(to_use, stats$variable),])
stats
round_df <- function(x, digits) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x, mode) == 'numeric'
x[numeric_columns] <- round(x[numeric_columns], digits)
x
}
# Rounding the numbers
round_df(stats, 3)
| variable | mean | sd | min | Q1 | median | Q3 | max |
|---|---|---|---|---|---|---|---|
| mortality | 134.616 | 106.706 | 0.390 | 50.638 | 125.660 | 204.732 | 605.680 |
| che | 7.623 | 2.580 | 2.495 | 5.689 | 7.540 | 9.260 | 16.885 |
| beds | 3.871 | 2.506 | 0.630 | 2.203 | 3.120 | 5.128 | 13.050 |
| pop65 | 14.778 | 5.963 | 1.523 | 9.130 | 15.593 | 19.762 | 28.002 |
| popdens | 296.441 | 1043.790 | 3.576 | 34.626 | 99.851 | 218.329 | 8044.526 |
| urban | 75.385 | 16.109 | 18.585 | 66.545 | 79.732 | 87.029 | 100.000 |
| dphe | 35.833 | 14.427 | 13.667 | 25.914 | 34.440 | 48.149 | 70.604 |
| dghe | 63.963 | 14.562 | 28.732 | 51.746 | 64.810 | 74.086 | 85.323 |
| tobacco | 24.247 | 8.766 | 7.900 | 18.325 | 23.500 | 28.875 | 44.700 |
| procur | 0.517 | 0.504 | 0.000 | 0.000 | 1.000 | 1.000 | 1.000 |
| doctors | 33.812 | 16.049 | 4.650 | 23.620 | 32.370 | 43.598 | 80.130 |
| nurses | 69.248 | 50.026 | 2.800 | 26.465 | 61.645 | 102.375 | 216.700 |
| beh_stayhome | 83.462 | 7.333 | 58.678 | 81.242 | 84.828 | 87.905 | 94.411 |
| beh_socgathering | 92.326 | 4.991 | 75.297 | 90.952 | 94.353 | 95.561 | 99.000 |
| beh_distance | 78.763 | 9.116 | 47.866 | 74.073 | 81.322 | 85.271 | 90.561 |
| beh_tellsymp | 92.846 | 4.281 | 78.447 | 92.823 | 94.256 | 95.094 | 97.724 |
| beh_handwash | 91.686 | 2.830 | 83.720 | 90.474 | 92.055 | 93.745 | 96.566 |
| fob_social | 0.976 | 0.038 | 0.786 | 0.976 | 0.989 | 0.994 | 1.000 |
| fob_handshake | 0.967 | 0.037 | 0.745 | 0.959 | 0.977 | 0.986 | 1.000 |
| fob_stores | 0.806 | 0.167 | 0.197 | 0.768 | 0.870 | 0.910 | 0.971 |
| fob_curfew | 0.714 | 0.194 | 0.159 | 0.592 | 0.741 | 0.875 | 0.990 |
| perceivedreaction_d | 0.403 | 0.233 | 0.000 | 0.232 | 0.357 | 0.565 | 0.908 |
| govtrust_d | 0.575 | 0.245 | 0.090 | 0.376 | 0.584 | 0.799 | 0.957 |
| govfact_d | 0.633 | 0.239 | 0.092 | 0.491 | 0.709 | 0.823 | 0.979 |
| perceivedeffectiveness_d | 0.888 | 0.054 | 0.696 | 0.854 | 0.900 | 0.926 | 0.966 |
| population | 64586735.900 | 187687848.304 | 360563.000 | 5427584.250 | 10730269.500 | 47935001.500 | 1397715000.000 |
# Computing the correlation matrix for the numeric variables (all except region)
CorMatrix = cor(data[, !names(data) %in% c("region")] , use = "complete.obs")
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# Matrix of p-values (H0: correlation = 0)
p.mat <- cor.mtest(data[, !names(data) %in% c("region")])
CorMatrix<-round(CorMatrix,2)
col <- colorRampPalette(c("deeppink", "hotpink", "lightpink", "floralwhite", "darkseagreen1", "darkslategray2", "dodgerblue4"))
#png(file="corr.png", res=300, width=4500, height=4500)
corrplot(CorMatrix, method="color", col=col(200),
type="upper", order="original",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, # Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = c(0.01, 0.05, 0.1), insig ="label_sig",
# Hide correlation coefficient on the principal diagonal
diag=FALSE , number.cex=0.9, tl.cex=1, pch.cex=1)
Correlogram
# Exploring region
data %>% ggplot(aes(y = mortality, x = region)) +
geom_boxplot()
# Drawing a map
theme_set(theme_bw())
thismap = map_data("world")
all$`Country Name` <- recode(all$`Country Name`, "'Egypt, Arab Rep.' = 'Egypt'; 'United Kingdom' = 'UK'; 'Korea, Rep.' = 'South Korea'; 'Russian Federation' = 'Russia'; 'Slovak Republic' = 'Slovakia'; 'United States' = 'USA'")
# Setting colors
thismap <- mutate(thismap, fill = ifelse(region %in% all$`Country Name`[all$region == 'Americas'], "#FF7F11", ifelse(region %in% all$`Country Name`[all$region == 'Europe'], "#1446A0", ifelse(region %in% all$`Country Name`[all$region == 'Western Pacific'], "#DB3069", ifelse(region %in% all$`Country Name`[all$region == 'South-East Asia'], "#00AF54", ifelse(region %in% all$`Country Name`[all$region == 'Eastern Mediterranean'], "#F5D547", "white"))))))
# Using scale_fill_identity to set correct colors
#png(file="map.png", res=300, width=4500, height=3000)
ggplot(thismap, aes(long, lat, fill = fill, group=group)) +
geom_polygon(colour="gray") +
scale_fill_identity("WHO Region", guide = "legend", labels = c("South-East Asia", "Europe", "Western Pacific", "Eastern Mediterranean", "Americas", "unavailable")) +
theme(legend.position = "bottom", legend.key.size = unit(1,"cm"), legend.title=element_text(size=30),
legend.text=element_text(size=25))
World Map
formattable(data %>% group_by(region) %>%
summarise(no_rows = length(region)))
| region | no_rows |
|---|---|
| Americas | 12 |
| Eastern Mediterranean | 7 |
| Europe | 33 |
| South-East Asia | 2 |
| Western Pacific | 6 |
# There is too few instances of the three regions, so it makes sense to make an "other" category.
data = data %>% mutate(region = case_when(data$region=="Americas" ~ "Americas", data$region=="Europe" ~ "Europe", TRUE ~ "Other"))
# Exploring procur
data %>% ggplot(aes(y = mortality, x = as.factor(procur))) +
geom_boxplot() + scale_x_discrete(name = "procur")
# Drawing scatterplots of everything with mortality
#png(file="scatter.png", res=300, width=6000, height=8000)
data %>% dplyr::select(c(1:25, 27)) %>%
gather(-mortality, key = "var", value = "value") %>%
ggplot(aes(x = value, y = mortality)) +
geom_point() +
facet_wrap(~ var, ncol=3, scales = "free", shrink=TRUE) +
theme_bw() +
theme(axis.text = element_text(size = 14),
axis.title = element_text( size = 16, face = "bold" ),
legend.position="none",
strip.text = element_text(size = 20))
Scatterplots
# Doing the same things again with the newly rebuilt dataset
allnew <- read_excel("che_covid19-new.xlsx")[,2:37] %>% relocate(mortality, .before = `Country Code`)
# Now when we select countries with no less than 20 respondents in the Global Behaviors and Perceptions survey, there are 96 complete observations
allnew <- filter(allnew, n >= 20)
allnew
# There are some extra variables, but we will not use all of them
to_use1 <- c("mortality", "che", "pop65", "popdens", "urban", "dphe", "dghe", "doctors", "nurses", "beh_stayhome", "beh_socgathering", "beh_distance", "beh_tellsymp", "beh_handwash", "fob_social", "fob_handshake", "fob_stores", "fob_curfew", "perceivedreaction_d", "govtrust_d", "govfact_d", "perceivedeffectiveness_d", "region", "population")
# Selecting columns we plan to use
datanew <- subset(allnew, select=to_use1)
datanew
# Alleviating the problems in the next function caused by "_" in column names
colnames(datanew) <- gsub("_", ".", colnames(datanew))
# Calculating descriptive statistics
number <- function(x, na.rm = TRUE){return(sum(!is.na(x)))}
statsnew <- datanew %>%
summarise(across(where(is.numeric),
list(mean = mean, median = median, sd = sd, min = min, max = max, Q1=~quantile(., probs = 0.25), Q3=~quantile(., probs = 0.75)),
na.rm = TRUE)) %>%
pivot_longer(everything(), names_to = "name", values_to = "value") %>%
separate(name, c("variable", "statistic"), sep = "_") %>%
pivot_wider(names_from = statistic, values_from = value) %>%
arrange(variable) %>%
select(variable, mean, sd, min, Q1, median, Q3, max)
# Changing column names back
colnames(datanew) <- gsub("\\.", "_", colnames(datanew))
datanew
# Changing variable names to match
statsnew$variable <- gsub("\\.", "_", statsnew$variable)
options(scipen=999) # Disabling scientific notation (e.g., e+01)
# Putting variables in the original order
statsnew = na.omit(statsnew[match(to_use1, statsnew$variable),])
statsnew
round_df <- function(x, digits) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x, mode) == 'numeric'
x[numeric_columns] <- round(x[numeric_columns], digits)
x
}
# Rounding the numbers
round_df(statsnew, 3)
| variable | mean | sd | min | Q1 | median | Q3 | max |
|---|---|---|---|---|---|---|---|
| mortality | 118.112 | 101.068 | 0.390 | 30.935 | 105.145 | 184.107 | 605.680 |
| che | 7.005 | 2.522 | 2.343 | 5.280 | 6.878 | 8.663 | 16.885 |
| pop65 | 12.195 | 6.668 | 1.157 | 6.429 | 12.111 | 18.753 | 28.002 |
| popdens | 268.424 | 856.981 | 3.298 | 46.360 | 100.047 | 220.683 | 8044.526 |
| urban | 68.466 | 20.491 | 17.313 | 56.996 | 71.190 | 83.755 | 100.000 |
| dphe | 39.795 | 16.242 | 11.957 | 26.599 | 39.478 | 49.723 | 77.270 |
| dghe | 57.860 | 18.333 | 14.867 | 45.247 | 59.586 | 73.157 | 88.043 |
| doctors | 27.952 | 17.935 | 0.600 | 12.657 | 26.290 | 40.515 | 80.130 |
| nurses | 58.149 | 46.944 | 2.800 | 19.030 | 51.520 | 74.270 | 216.700 |
| beh_stayhome | 82.679 | 8.712 | 48.359 | 79.057 | 84.243 | 88.498 | 95.720 |
| beh_socgathering | 91.259 | 5.792 | 70.304 | 88.162 | 93.192 | 95.545 | 99.300 |
| beh_distance | 76.325 | 9.914 | 47.866 | 69.791 | 77.600 | 84.310 | 92.067 |
| beh_tellsymp | 92.308 | 4.673 | 78.447 | 90.600 | 93.771 | 95.013 | 99.289 |
| beh_handwash | 91.494 | 3.286 | 80.600 | 90.182 | 91.975 | 93.745 | 97.921 |
| fob_social | 0.978 | 0.033 | 0.786 | 0.977 | 0.989 | 0.995 | 1.000 |
| fob_handshake | 0.965 | 0.045 | 0.661 | 0.956 | 0.975 | 0.985 | 1.000 |
| fob_stores | 0.814 | 0.144 | 0.197 | 0.775 | 0.858 | 0.901 | 0.971 |
| fob_curfew | 0.747 | 0.179 | 0.159 | 0.645 | 0.776 | 0.892 | 1.000 |
| perceivedreaction_d | 0.403 | 0.245 | 0.000 | 0.215 | 0.368 | 0.565 | 0.952 |
| govtrust_d | 0.540 | 0.251 | 0.040 | 0.370 | 0.524 | 0.771 | 0.957 |
| govfact_d | 0.600 | 0.244 | 0.092 | 0.400 | 0.653 | 0.803 | 0.979 |
| perceivedeffectiveness_d | 0.873 | 0.070 | 0.619 | 0.837 | 0.890 | 0.922 | 1.000 |
| population | 69540265.083 | 201817550.622 | 360563.000 | 5658078.250 | 12160829.500 | 53931840.500 | 1397715000.000 |
# Computing the correlation matrix for the numeric variables (all except region)
CorMatrix1 = cor(datanew[, !names(datanew) %in% c("region")] , use = "complete.obs")
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# Matrix of p-values (H0: correlation = 0)
p.mat1 <- cor.mtest(datanew[, !names(datanew) %in% c("region")])
CorMatrix1<-round(CorMatrix1,2)
col <- colorRampPalette(c("deeppink", "hotpink", "lightpink", "floralwhite", "darkseagreen1", "darkslategray2", "dodgerblue4"))
#png(file="newcorr.png", res=300, width=4500, height=4500)
corrplot(CorMatrix1, method="color", col=col(200),
type="upper", order="original",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, # Text label color and rotation
# Combine with significance
p.mat = p.mat1, sig.level = c(0.01, 0.05, 0.1), insig ="label_sig",
# Hide correlation coefficient on the principal diagonal
diag=FALSE , number.cex=0.9, tl.cex=1, pch.cex=1)
# Заново каждый раз, когда library(corrplot)
# trace(corrplot, edit=TRUE)
# заменить на 443 строке
# place_points = function(sig.locs, point) {
#text(pos.pNew[, 1][sig.locs], pos.pNew[, 2][sig.locs],
# labels = point, col = pch.col, cex = pch.cex,
#lwd = 2)
# НА
#place_points = function(sig.locs, point) {
#text(pos.pNew[, 1][sig.locs], (pos.pNew[, 2][sig.locs])+0.25,
#labels = point, col = pch.col, cex = pch.cex,
#lwd = 2)
New Correlogram
formattable(datanew %>% group_by(region) %>%
summarise(no_rows = length(region)))
| region | no_rows |
|---|---|
| Africa | 7 |
| Americas | 18 |
| Eastern Mediterranean | 13 |
| Europe | 43 |
| South-East Asia | 6 |
| Western Pacific | 9 |
# Exploring region
datanew %>% ggplot(aes(y = mortality, x = region)) +
geom_boxplot() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Drawing scatterplots of everything with mortality
#png(file="newscatter.png", res=300, width=6000, height=8000)
datanew %>% dplyr::select(c(1:22, 24)) %>%
gather(-mortality, key = "var", value = "value") %>%
ggplot(aes(x = value, y = mortality)) +
geom_point() +
facet_wrap(~ var, ncol=3, scales = "free", shrink=TRUE) +
theme_bw() +
theme(axis.text = element_text(size = 14),
axis.title = element_text( size = 16, face = "bold" ),
legend.position="none",
strip.text = element_text(size = 20))
New Scatterplots
# Drawing a map
theme_set(theme_bw())
thismap1 = map_data("world")
allnew
allnew$`Country Name` <- recode(allnew$`Country Name`, "'Egypt, Arab Rep.' = 'Egypt'; 'United Kingdom' = 'UK'; 'Korea, Rep.' = 'South Korea'; 'Russian Federation' = 'Russia'; 'Slovak Republic' = 'Slovakia'; 'United States' = 'USA'; 'Iran, Islamic Rep.' = 'Iran'")
# Setting colors
thismap1 <- mutate(thismap1, fill = ifelse(region %in% allnew$`Country Name`[allnew$region == 'Americas'], "#FF7F11", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Europe'], "#1446A0", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Western Pacific'], "#DB3069", ifelse(region %in% allnew$`Country Name`[allnew$region == 'South-East Asia'], "#00AF54", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Eastern Mediterranean'], "#F5D547", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Africa'], "magenta", "white")))))))
# Using scale_fill_identity to set correct colors
#png(file="newmap (1).png", res=300, width=4500, height=3000)
ggplot(thismap1, aes(long, lat, fill = fill, group=group)) +
geom_polygon(colour="gray") +
scale_fill_identity("WHO Region", guide = "legend", labels = c("South-East Asia", "Europe", "Western Pacific", "Eastern Mediterranean", "Americas", "Africa", "u/a")) +
theme(legend.position = "bottom", legend.key.size = unit(1,"cm"), legend.title=element_text(size=30),
legend.text=element_text(size=25))
New World Map
# Drawing a scatterplot over a limited range of density for better visibility
datanew %>%
ggplot(aes(popdens, mortality)) +
geom_jitter(width = 0.25, alpha = 0.5) + xlim(0, 1000)
# Drawing a scatterplot over a limited range of population for better visibility
datanew %>%
ggplot(aes(population, mortality)) +
geom_jitter(width = 0.25, alpha = 0.5) +
scale_x_continuous(labels = unit_format(unit = "M", scale = 1e-6), limits = c(0, 200000000), name = "population (mln.)")
allnew <- read_excel("che_covid19-new.xlsx")[,2:37] %>%
relocate(mortality, .before = `Country Code`)
# Now when we select countries with no less than 20 respondents in the Global Behaviors and Perceptions survey, there are 96 complete observations
allnew <- filter(allnew, n >= 20)
allnew
# Adding a new variable income level
incomelvl <- read_excel("incomelvl.xlsx")
incomelvl
allnew <- merge(x = allnew, y = incomelvl, by = "Country Code", all.x = TRUE) %>%
relocate(mortality, .before = `Country Code`)
allnew
# Exploring incomelvl
theme_set(theme_bw())
formattable(datanew %>% group_by(incomelvl) %>%
summarise(no_rows = length(incomelvl)))
| incomelvl | no_rows |
|---|---|
| LIC | 4 |
| LMC | 18 |
| UMC | 28 |
| HIC | 46 |
datanew %>% ggplot(aes(y = mortality, x = incomelvl)) +
geom_boxplot() #+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
datanew %>% ggplot(aes(y = che, x = incomelvl)) +
geom_boxplot()
reg_ols_1 <- lm(mortality ~ che, data = datanew)
cov_ols_1 <- vcovHC(reg_ols_1, type = "HC0")
se_ols_1 <- sqrt(diag(cov_ols_1))
coeftest(reg_ols_1, df = Inf, vcov = cov_ols_1)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 33.6226 23.7954 1.4130 0.1577
che 12.0605 2.9917 4.0313 0.00005547 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_2 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens, data = datanew)
cov_ols_2 <- vcovHC(reg_ols_2, type = "HC0")
se_ols_2 <- sqrt(diag(cov_ols_2))
coeftest(reg_ols_2, df = Inf, vcov = cov_ols_2)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.7990683 28.8566442 -0.2703 0.78695
che 6.4684294 4.3189153 1.4977 0.13421
pop65 2.9194319 2.5253820 1.1560 0.24767
urban 0.5368160 0.7553619 0.7107 0.47729
doctors 0.9794441 0.9662417 1.0137 0.31074
nurses -0.6215683 0.2906442 -2.1386 0.03247 *
dghe 0.3765267 0.6021621 0.6253 0.53178
popdens -0.0178004 0.0080409 -2.2137 0.02685 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_3 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region, data = datanew)
cov_ols_3 <- vcovHC(reg_ols_3, type = "HC0")
se_ols_3 <- sqrt(diag(cov_ols_3))
coeftest(reg_ols_3, df = Inf, vcov = cov_ols_3)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 7.2935398 30.5413722 0.2388 0.8112541
che 0.6231485 6.1960348 0.1006 0.9198901
pop65 4.2552531 2.9769151 1.4294 0.1528844
urban 0.0192724 0.6831571 0.0282 0.9774940
doctors 0.1674631 0.8513386 0.1967 0.8440579
nurses -0.4154245 0.2662049 -1.5605 0.1186313
dghe 0.1963998 0.5267080 0.3729 0.7092364
popdens -0.0035068 0.0056073 -0.6254 0.5317112
regionAmericas 138.9955990 41.0764961 3.3838 0.0007148 ***
regionEastern Mediterranean 37.8998494 32.4633225 1.1675 0.2430219
regionEurope 77.3828482 37.8600206 2.0439 0.0409615 *
regionSouth-East Asia -2.2814351 25.9090979 -0.0881 0.9298327
regionWestern Pacific -36.4469465 35.1404621 -1.0372 0.2996525
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_4 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region + beh_stayhome + beh_socgathering + beh_distance + beh_tellsymp + beh_handwash + fob_curfew, data = datanew)
cov_ols_4 <- vcovHC(reg_ols_4, type = "HC0")
se_ols_4 <- sqrt(diag(cov_ols_4))
coeftest(reg_ols_4, df = Inf, vcov = cov_ols_4)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 315.6203964 194.6502705 1.6215 0.10492
che -4.4513361 7.2794672 -0.6115 0.54087
pop65 6.0516146 3.3179411 1.8239 0.06817 .
urban 0.4242408 0.7379454 0.5749 0.56536
doctors 0.3825550 0.8009964 0.4776 0.63294
nurses -0.1422158 0.2687819 -0.5291 0.59673
dghe -0.4739245 0.6012689 -0.7882 0.43058
popdens -0.0080841 0.0066393 -1.2176 0.22337
regionAmericas 108.9997804 39.8089597 2.7381 0.00618 **
regionEastern Mediterranean -16.6879280 40.3347231 -0.4137 0.67907
regionEurope 42.1476912 43.6497995 0.9656 0.33425
regionSouth-East Asia -35.1785413 37.1911593 -0.9459 0.34421
regionWestern Pacific -57.3234413 38.3044024 -1.4965 0.13452
beh_stayhome 3.3388241 1.7920384 1.8631 0.06244 .
beh_socgathering -4.8276446 2.1844828 -2.2100 0.02711 *
beh_distance 1.5145785 1.0802867 1.4020 0.16091
beh_tellsymp 1.2667673 1.6091127 0.7872 0.43114
beh_handwash -4.2854623 2.3890173 -1.7938 0.07284 .
fob_curfew 70.4266393 73.7354035 0.9551 0.33951
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_5 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region + beh_stayhome + beh_socgathering + beh_distance + beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che*incomelvl, data = datanew)
cov_ols_5 <- vcovHC(reg_ols_5, type = "HC0")
se_ols_5 <- sqrt(diag(cov_ols_5))
coeftest(reg_ols_5, df = Inf, vcov = cov_ols_5)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 438.0531775 213.6556502 2.0503 0.04034 *
che -19.4257130 9.6939004 -2.0039 0.04508 *
pop65 8.2430275 3.3517601 2.4593 0.01392 *
urban -0.3455764 0.7613494 -0.4539 0.64990
doctors 0.5677534 0.6779376 0.8375 0.40233
nurses 0.2828706 0.2333618 1.2122 0.22545
dghe -0.3514454 0.5467960 -0.6427 0.52040
popdens -0.0022406 0.0049432 -0.4533 0.65035
regionAmericas 69.7980221 31.8857323 2.1890 0.02860 *
regionEastern Mediterranean -30.2692253 26.3589037 -1.1483 0.25082
regionEurope -11.9103576 42.0379380 -0.2833 0.77693
regionSouth-East Asia -58.4882922 39.3263065 -1.4873 0.13695
regionWestern Pacific -87.7247057 35.6810732 -2.4586 0.01395 *
beh_stayhome 3.7691104 1.8577889 2.0288 0.04248 *
beh_socgathering -4.3915761 2.2316736 -1.9678 0.04909 *
beh_distance 1.7291293 1.2440892 1.3899 0.16457
beh_tellsymp 0.0574909 1.5489442 0.0371 0.97039
beh_handwash -4.4255514 2.4576044 -1.8008 0.07174 .
fob_curfew 74.1821868 58.3881539 1.2705 0.20391
incomelvlLMC -141.4104774 58.6057055 -2.4129 0.01583 *
incomelvlUMC -73.6479444 69.0907199 -1.0660 0.28644
incomelvlHIC -40.0951582 77.3458167 -0.5184 0.60419
che:incomelvlLMC 25.8380200 12.2794892 2.1042 0.03536 *
che:incomelvlUMC 23.6900038 11.7720126 2.0124 0.04418 *
che:incomelvlHIC 8.9878346 9.9291733 0.9052 0.36536
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(reg_ols_1, reg_ols_2, reg_ols_3, reg_ols_4, reg_ols_5,
se = list(se_ols_1, se_ols_2, se_ols_3, se_ols_4, se_ols_5),
title = "Regression results (n=96)",
omit = "Constant",
keep.stat = "n",
notes = "Robust standard errors in parentheses",
type = 'latex')
| Dependent variable: | |||||
| mortality | |||||
| (1) | (2) | (3) | (4) | (5) | |
| che | 12.060*** | 6.468 | 0.623 | -4.451 | -19.426** |
| (2.992) | (4.319) | (6.196) | (7.279) | (9.694) | |
| pop65 | 2.919 | 4.255 | 6.052* | 8.243** | |
| (2.525) | (2.977) | (3.318) | (3.352) | ||
| urban | 0.537 | 0.019 | 0.424 | -0.346 | |
| (0.755) | (0.683) | (0.738) | (0.761) | ||
| doctors | 0.979 | 0.167 | 0.383 | 0.568 | |
| (0.966) | (0.851) | (0.801) | (0.678) | ||
| nurses | -0.622** | -0.415 | -0.142 | 0.283 | |
| (0.291) | (0.266) | (0.269) | (0.233) | ||
| dghe | 0.377 | 0.196 | -0.474 | -0.351 | |
| (0.602) | (0.527) | (0.601) | (0.547) | ||
| popdens | -0.018** | -0.004 | -0.008 | -0.002 | |
| (0.008) | (0.006) | (0.007) | (0.005) | ||
| regionAmericas | 138.996*** | 109.000*** | 69.798** | ||
| (41.076) | (39.809) | (31.886) | |||
| regionEastern Mediterranean | 37.900 | -16.688 | -30.269 | ||
| (32.463) | (40.335) | (26.359) | |||
| regionEurope | 77.383** | 42.148 | -11.910 | ||
| (37.860) | (43.650) | (42.038) | |||
| regionSouth-East Asia | -2.281 | -35.179 | -58.488 | ||
| (25.909) | (37.191) | (39.326) | |||
| regionWestern Pacific | -36.447 | -57.323 | -87.725** | ||
| (35.140) | (38.304) | (35.681) | |||
| beh_stayhome | 3.339* | 3.769** | |||
| (1.792) | (1.858) | ||||
| beh_socgathering | -4.828** | -4.392** | |||
| (2.184) | (2.232) | ||||
| beh_distance | 1.515 | 1.729 | |||
| (1.080) | (1.244) | ||||
| beh_tellsymp | 1.267 | 0.057 | |||
| (1.609) | (1.549) | ||||
| beh_handwash | -4.285* | -4.426* | |||
| (2.389) | (2.458) | ||||
| fob_curfew | 70.427 | 74.182 | |||
| (73.735) | (58.388) | ||||
| incomelvlLMC | -141.410** | ||||
| (58.606) | |||||
| incomelvlUMC | -73.648 | ||||
| (69.091) | |||||
| incomelvlHIC | -40.095 | ||||
| (77.346) | |||||
| che:incomelvlLMC | 25.838** | ||||
| (12.279) | |||||
| che:incomelvlUMC | 23.690** | ||||
| (11.772) | |||||
| che:incomelvlHIC | 8.988 | ||||
| (9.929) | |||||
| Observations | 96 | 96 | 96 | 96 | 96 |
| Note: | *p<0.1; **p<0.05; ***p<0.01 | ||||
| Robust standard errors in parentheses | |||||
# Regressions (1)-(3) on a larger sample
many <- read_excel("che_covid19-new.xlsx")[,2:37] %>%
relocate(mortality, .before = `Country Code`)
many
reg_ols_11 <- lm(mortality ~ che, data = many)
cov_ols_11 <- vcovHC(reg_ols_11, type = "HC0")
se_ols_11 <- sqrt(diag(cov_ols_11))
coeftest(reg_ols_11, df = Inf, vcov = cov_ols_11)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 19.5433 17.0158 1.1485 0.2507
che 10.4748 2.6424 3.9642 0.00007365 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_22 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens, data = many)
cov_ols_22 <- vcovHC(reg_ols_22, type = "HC0")
se_ols_22 <- sqrt(diag(cov_ols_22))
coeftest(reg_ols_22, df = Inf, vcov = cov_ols_22)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -33.9768080 22.3549755 -1.5199 0.1285420
che 2.5378506 2.2068385 1.1500 0.2501464
pop65 5.6772044 1.9893389 2.8538 0.0043198 **
urban 0.7709671 0.4248587 1.8146 0.0695787 .
doctors 0.7266031 0.7512339 0.9672 0.3334376
nurses -0.4870531 0.2520901 -1.9321 0.0533522 .
dghe 0.3209735 0.3040956 1.0555 0.2911958
popdens -0.0185701 0.0049825 -3.7270 0.0001937 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_33 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region, data = many)
cov_ols_33 <- vcovHC(reg_ols_33, type = "HC0")
se_ols_33 <- sqrt(diag(cov_ols_33))
coeftest(reg_ols_33, df = Inf, vcov = cov_ols_33)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -28.9740418 20.3121227 -1.4264 0.1537411
che 0.9854109 2.0853514 0.4725 0.6365418
pop65 4.3977674 1.7977340 2.4463 0.0144337 *
urban 0.4661615 0.4049519 1.1512 0.2496694
doctors -0.2366145 0.7367834 -0.3211 0.7481003
nurses -0.3959232 0.2389396 -1.6570 0.0975192 .
dghe 0.4638272 0.3064896 1.5134 0.1301898
popdens -0.0083562 0.0048076 -1.7381 0.0821876 .
regionAmericas 97.1379134 26.9160554 3.6089 0.0003075 ***
regionEastern Mediterranean 30.7295222 15.4549796 1.9883 0.0467758 *
regionEurope 77.6972301 26.3780231 2.9455 0.0032240 **
regionSouth-East Asia 7.4116247 15.1387496 0.4896 0.6244313
regionWestern Pacific -26.5669946 14.5254225 -1.8290 0.0673996 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(reg_ols_11, reg_ols_22, reg_ols_33,
se = list(se_ols_11, se_ols_22, se_ols_33),
title = "Regression results (n=160)",
omit = "Constant",
keep.stat = "n",
notes = "Robust standard errors in parentheses",
type = 'latex')
| Dependent variable: | |||
| mortality | |||
| (1) | (2) | (3) | |
| che | 10.475*** | 2.538 | 0.985 |
| (2.642) | (2.207) | (2.085) | |
| pop65 | 5.677*** | 4.398** | |
| (1.989) | (1.798) | ||
| urban | 0.771* | 0.466 | |
| (0.425) | (0.405) | ||
| doctors | 0.727 | -0.237 | |
| (0.751) | (0.737) | ||
| nurses | -0.487* | -0.396* | |
| (0.252) | (0.239) | ||
| dghe | 0.321 | 0.464 | |
| (0.304) | (0.306) | ||
| popdens | -0.019*** | -0.008* | |
| (0.005) | (0.005) | ||
| regionAmericas | 97.138*** | ||
| (26.916) | |||
| regionEastern Mediterranean | 30.730** | ||
| (15.455) | |||
| regionEurope | 77.697*** | ||
| (26.378) | |||
| regionSouth-East Asia | 7.412 | ||
| (15.139) | |||
| regionWestern Pacific | -26.567* | ||
| (14.525) | |||
| Observations | 160 | 160 | 160 |
| Note: | *p<0.1; **p<0.05; ***p<0.01 | ||
| Robust standard errors in parentheses | |||
car::linearHypothesis(reg_ols_5, "che + che:incomelvlLMC = 0", test="Chisq", white.adjust="hc0")
Linear hypothesis test
Hypothesis:
che + che:incomelvlLMC = 0
Model 1: restricted model
Model 2: mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens +
region + beh_stayhome + beh_socgathering + beh_distance +
beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che *
incomelvl
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 72
2 71 1 0.254 0.6143
car::linearHypothesis(reg_ols_5, "che + che:incomelvlUMC = 0", test="Chisq", white.adjust="hc0")
Linear hypothesis test
Hypothesis:
che + che:incomelvlUMC = 0
Model 1: restricted model
Model 2: mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens +
region + beh_stayhome + beh_socgathering + beh_distance +
beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che *
incomelvl
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 72
2 71 1 0.1122 0.7376
car::linearHypothesis(reg_ols_5, "che + che:incomelvlHIC = 0", test="Chisq", white.adjust="hc0")
Linear hypothesis test
Hypothesis:
che + che:incomelvlHIC = 0
Model 1: restricted model
Model 2: mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens +
region + beh_stayhome + beh_socgathering + beh_distance +
beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che *
incomelvl
Note: Coefficient covariance matrix supplied.
Res.Df Df Chisq Pr(>Chisq)
1 72
2 71 1 2.6075 0.1064